library(dataRetrieval) #read in necessary libraries
library(plotly)
library(dplyr)
library(MASS)
library(knitr)
library(boot)
library(ggplot2)
library(data.table)
library(formattable)
library(tidyr)
TSS_Caz <- read.csv('~/AWQP/TSS.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS) #read in data for each persons TSS Standards
mean_Caz = mean(TSS_Caz$TSS..mg.L.)
RMSE_Caz <- sqrt(mean((TSS_Caz$TSS - TSS_Caz$TSS..mg.L.)^2)) #find RSME
sd_Caz <- sd(TSS_Caz$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Caz <- sd(TSS_Caz$TSS..mg.L.) / mean(TSS_Caz$TSS..mg.L.) * 100 #find cv
TSS_Justin <- read.csv('~/AWQP/TSS_Justin.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS)
mean_Justin = mean(TSS_Justin$TSS..mg.L.)
RMSE_Justin <- sqrt(mean((TSS_Justin$TSS - TSS_Justin$TSS..mg.L.)^2)) #find RSME
sd_Justin <- sd(TSS_Justin$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Justin <- sd(TSS_Justin$TSS..mg.L.) / mean(TSS_Justin$TSS..mg.L.) * 100 #find cv
TSS_Melina <- read.csv('~/AWQP/TSS_Melina.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS)
mean_Mel = mean(TSS_Melina$TSS..mg.L.)
RMSE_Mel <- sqrt(mean((TSS_Melina$TSS - TSS_Melina$TSS..mg.L.)^2)) #find RSME
sd_Mel <- sd(TSS_Melina$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Mel <- sd(TSS_Melina$TSS..mg.L.) / mean(TSS_Melina$TSS..mg.L.) * 100
TSS_Mia <- read.csv('~/AWQP/TSS_Mia_.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS) %>%
na.omit()
mean_Mia = mean(TSS_Mia$TSS..mg.L.)
RMSE_Mia <- sqrt(mean((TSS_Mia$TSS - TSS_Mia$TSS..mg.L.)^2)) #find RSME
sd_Mia <- sd(TSS_Mia$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Mia <- sd(TSS_Mia$TSS..mg.L.) / mean(TSS_Mia$TSS..mg.L.) * 100
TSS_bind1 <- rbind(TSS_Caz, TSS_Justin, TSS_Melina, TSS_Mia) %>% #combine individual data sets
na.omit() %>%
dplyr::select(TSS..mg.L., Name)
TSS <- TSS_bind1 %>%
group_by(Name) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = Name, values_from = TSS..mg.L.) %>%
dplyr::select(-row) %>%
na.omit()
tab_stats <- matrix(c(mean_Caz,sd_Caz, RMSE_Caz, cv_Caz, mean_Justin, sd_Justin, RMSE_Justin, cv_Justin, mean_Mel, sd_Mel, RMSE_Mel, cv_Mel, mean_Mia,sd_Caz, RMSE_Mia, cv_Mia), ncol=4, byrow=TRUE) #create matrix for table
colnames(tab_stats) <- c('Mean(ppm)','Standard Deviation(ppm)','Root Mean Square Error(ppm)', 'Coefficient of Variation(%)') #label columns
rownames(tab_stats) <- c('Caz','Justin','Melina', 'Mia') #label rows
table_stats <- as.table(tab_stats)
formattable(table_stats) %>%
kable(caption = "Summary Statistics") #format table
Summary Statistics
| Caz |
57.27273 |
3.595733 |
8.040303 |
6.278263 |
| Justin |
65.33333 |
8.344437 |
17.256239 |
12.772098 |
| Melina |
56.29630 |
5.386311 |
8.089011 |
9.567789 |
| Mia |
60.37037 |
3.595733 |
12.765695 |
13.079145 |
fig <- plot_ly(TSS_bind1, x = ~Name, y = ~TSS..mg.L., color = ~Name, colors = c("red", "blue", "purple", 'green4')) #select values for plotly, assign colors
fig <- fig %>% add_markers(marker = list(line = list(color = ~Name, width = 1)))
fig <- fig %>% layout(
title = "TSS Standards", #title
xaxis = list(domain = c(0.1, 1)),
yaxis = list(title = "y"),
updatemenus = list(
list(
y = 0.8,
buttons = list(
list(method = "restyle", #set up box plot
args = list("type", "box"),
label = "Box plot"
),
list(method = "restyle", #set up violin plot
args = list("type", "violin"),
label = "Violin"
)))
))
fig
cols <- c("red", "blue", "purple", 'green4') #attach colors for each person
# Basic density plot in ggplot2
ggplot( TSS_bind1, aes(x = TSS..mg.L., colour = Name)) + #sort colors for names
geom_density(lwd = 1.2, linetype = 1) +
scale_color_manual(values = cols) + #use previous set colors
labs(x = 'TSS mg/L', y = 'Density',
title = 'TSS Kernel Density Estimation') #label the graph
